# -*- coding: utf-8 -*-
import scrapy
from scrapy.http import Request
from tutorial.items import XsItem


# 爬指定的小说
class DingdianSpider(scrapy.Spider):
    name = "dingdian"
    allowed_domains = ["23wx.com"]
    start_urls = (
        'http://www.23wx.com/html/51/51514/',
    )

    def parse(self, response):
        hrefs = response.xpath('//*[@id="at"]/tr/td/a/@href').extract()
        num = 1

        for href in hrefs[:-1]:
            item = XsItem()

            item['chapter_url'] = href
            item['chapter_id'] = num
            num += 1
            url = response.url + href
            yield Request(url, meta={'item': item}, callback=self.parse_chapter)

    def parse_chapter(self, response):
        chapter_content = response.xpath('//dd[@id="contents"]').extract()[0]
        chapter_content = chapter_content.replace('<br>', '').replace('<dd id="contents">', '').replace('</dd>', '')

        item = response.meta['item']
        item['book_name'] = response.xpath('//*[@id="amain"]/dl/dt/a[3]/text()').extract()[0]
        item['chapter_name'] = response.xpath('//div[@id="amain"]/dl/dd[1]/h1/text()').extract()[0]
        item['chapter_content'] = chapter_content
        return item
